###############################################################################   
#### Replication Materials                                                 #### 
#### Taegyoon Kim, 2022. Violent Political Rhetoric on Twitter.            ####
#### Political Science Research and Methods                                ####
###############################################################################  



#################################### Set Up ###################################


## packages

import pandas as pd
import numpy as np
from matplotlib.lines import Line2D
import matplotlib.dates as md
import matplotlib.pyplot as plt


## path

path_data = 'kim_psrm_replication/data/' 
path_output = 'kim_psrm_replication/output/' 



################################### Figure 3 ##################################


## load data

df_timeline = pd.read_csv(path_data + 'df_timeline.csv')


## generate figure 3 (run all at once)

df_timeline['dates'] = pd.to_datetime(df_timeline['dates'], 
                                      format = '%Y-%m-%d')
ax = df_timeline.plot(x = 'dates', 
                      y = 'tweet_violent_ma', 
                      marker = '.', 
                      legend = False, 
                      color = 'black', 
                      figsize = (15, 11))
ax2 = ax.twinx()

df_timeline.plot(x = 'dates', 
                 y = 'tweet_violent_to_political_ma', 
                 marker = '.', 
                 ax = ax2, 
                 legend = False, 
                 color = 'black', 
                 linestyle = ':')


ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday = 0))
ax.xaxis.set_major_formatter(md.DateFormatter('%b\n%d'))
ax.xaxis.set_minor_locator(md.DayLocator(interval = 1))

plt.setp(ax.xaxis.get_majorticklabels(), rotation = 0)

ax.tick_params(axis = 'x', which = 'major', length = 20)
ax.tick_params(axis = 'x', which = 'minor', length = 7.5)
ax.tick_params(axis = 'both', which = 'major', labelsize = 17.5)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 17.5)

count_line = Line2D([0], [0], 
                    color = 'black', 
                    lw = 2, 
                    label = 'Count of Tweets with Violent Political Rhetoric', 
                    linestyle = '-')           
proportion_line = Line2D(
    [0], [0], 
    color='black', 
    lw = 2, 
    label = 'Proportion of Tweets with Violent Political Rhetoric to Political Tweets',
    linestyle = ':'
    )           
plt.legend(handles=[count_line, proportion_line], fontsize = 17.5)

ax.yaxis.grid()

ax.set_xlabel('\nDates', fontsize = 20)
ax.set_ylabel('Count (three-day moving average)\n', 
              color = 'black', 
              fontsize = 20)
ax2.set_ylabel('\nProportion (three-day moving average)', 
               color = 'black', 
               fontsize = 20)

plt.savefig(path_output + 'fig3.pdf')



########################### Figure 5a & Figure 5c ############################


## load data

df_ideology_violent = pd.read_csv(path_data + 'df_ideology_violent.csv')
df_ideology_nonviolent = pd.read_csv(path_data + 'df_ideology_nonviolent.csv')


## plot figure 5a (run all at once)

plt.figure(figsize=(10, 7))
bins = np.linspace(-4, 4, 100)

plt.hist(df_ideology_violent['ideology_final'], 
         bins, 
         alpha = 0.75, 
         label = 'Violent', 
         density = True, 
         color = 'red')

plt.hist(df_ideology_nonviolent['ideology_final'], 
         bins, 
         alpha = 0.75, 
         label = 'Non-violent', 
         density = True, 
         color = 'skyblue')

plt.legend(loc = 'upper right', fontsize = 15)

plt.xlabel('\nEstimated Ideology Score', fontsize = 15)
plt.ylabel('Density\n', fontsize = 15)

plt.tick_params(axis = 'both', which = 'major', labelsize = 13.5)

plt.axvline(df_ideology_violent['ideology_final'].mean(), color='red', 
            linestyle = '-', 
            linewidth = 1)
plt.axvline(df_ideology_nonviolent['ideology_final'].mean(), 
            color = 'skyblue', 
            linestyle = '-', 
            linewidth = 1)

plt.savefig(path_output + 'fig5a.pdf')


## plot figure 5c (run all at once)

df_ideology_violent['ideology_final_abs'] = df_ideology_violent['ideology_final'].abs()
df_ideology_nonviolent['ideology_final_abs'] = df_ideology_nonviolent['ideology_final'].abs()


plt.figure(figsize=(10, 7))
bins = np.linspace(0, 4, 100)

plt.hist(df_ideology_violent['ideology_final_abs'], 
         bins, 
         alpha = 0.75, 
         label = 'Violent', 
         density = True, 
         color = 'red')
plt.hist(df_ideology_nonviolent['ideology_final_abs'], 
         bins, 
         alpha = 0.75, 
         label = 'Non-violent', 
         density = True, 
         color = 'skyblue')

plt.legend(loc = 'upper left', fontsize = 15)

plt.xlabel('\nEstimated Ideology Score: Aboslute Value', fontsize = 15)
plt.ylabel('Density\n', fontsize = 15)

plt.tick_params(axis = 'both', which = 'major', labelsize = 13.5)

plt.axvline(df_ideology_violent['ideology_final_abs'].mean(),
            color = 'red', 
            linestyle = '-', 
            linewidth = 1)
plt.axvline(df_ideology_nonviolent['ideology_final_abs'].mean(), 
            color = 'skyblue', 
            linestyle = '-', 
            linewidth = 1)

plt.savefig(path_output + 'fig5c.pdf')



################################### Figure a2 #################################


## load data

df_ideology_violent_without_trump = pd.read_csv(path_data + 'df_ideology_violent_without_trump.csv')
df_ideology_nonviolent_without_trump = pd.read_csv(path_data + 'df_ideology_nonviolent_without_trump.csv')


## plot figure a2 (run all at once)

plt.figure(figsize=(10, 7))
bins = np.linspace(-4, 4, 100)

plt.hist(df_ideology_violent_without_trump['ideology_final'], 
         bins, 
         alpha = 0.75, 
         label = 'Violent', 
         density = True, 
         color = 'red')

plt.hist(df_ideology_nonviolent_without_trump['ideology_final'], 
         bins, 
         alpha = 0.75, 
         label = 'Non-violent', 
         density = True, 
         color = 'skyblue')

plt.legend(loc = 'upper right', fontsize = 15)

plt.xlabel('\nEstimated Ideology Score', fontsize = 15)
plt.ylabel('Density\n', fontsize = 15)

plt.tick_params(axis = 'both', which = 'major', labelsize = 13.5)

plt.axvline(df_ideology_violent_without_trump['ideology_final'].mean(), color='red', 
            linestyle = '-', 
            linewidth = 1)
plt.axvline(df_ideology_nonviolent_without_trump['ideology_final'].mean(), 
            color = 'skyblue', 
            linestyle = '-', 
            linewidth = 1)

plt.savefig(path_output + 'figa2.pdf')



################################### Figure 6c #################################


## load data

df_distance_nonviolent = pd.read_csv(path_data + 'df_distance_nonviolent.csv')
df_distance_violent = pd.read_csv(path_data + 'df_distance_violent.csv')


## plot figure 6c (run all at once)

pol_count_1 = int(df_distance_nonviolent['distance_1'].sum()) # 0.6680672268907563
pol_count_2 = int(df_distance_nonviolent['distance_2'].sum()) # 0.2689075630252101
pol_count_3 = int(df_distance_nonviolent['distance_3'].sum()) # 0.06302521008403361
pol_count_all = pol_count_1 + pol_count_2 + pol_count_3

print(pol_count_1 / pol_count_all)
print(pol_count_2 / pol_count_all)
print(pol_count_3 / pol_count_all)

vio_count_1 = int(df_distance_violent['distance_1'].sum()) # 15749.0 / 61.911314
vio_count_2 = int(df_distance_violent['distance_2'].sum()) # 7906.0 / 31.079487
vio_count_3 = int(df_distance_violent['distance_3'].sum()) # 1783.0 / 7.009199
vio_count_all = vio_count_1 + vio_count_2 + vio_count_3

print(vio_count_1 / vio_count_all)
print(vio_count_2 / vio_count_all)
print(vio_count_3 / vio_count_all)

vio_counts = np.array([vio_count_1, vio_count_2, vio_count_3]).astype(float)
pol_counts = np.array([pol_count_1, pol_count_2, pol_count_3]).astype(float)
vio_perc =  100 * vio_counts / vio_counts.sum()
pol_perc =  100 * pol_counts / pol_counts.sum()
df = pd.DataFrame({"Distance": ['One','Two','Three or more'],
                   "Violent Count": vio_counts, 
                   "Violent Proportion": vio_perc,
                   "Political Count": pol_counts, 
                   "Political Proportion" :  pol_perc
                   })

n = 3 # numbers of pairs of bars
ind = np.arange(n) # position of bars on x-axis
plt.figure(figsize=(11, 7)) # figure size
width = 0.3 # width of a bar 

plt.bar(ind, df['Violent Proportion'], 
        width, 
        label = 'Violent Tweets', 
        color = 'red', 
        alpha = 0.75)
plt.bar(ind + width, 
        df['Political Proportion'], 
        width, 
        label = 'Non-violent Tweets', 
        color = 'skyblue', 
        alpha = 0.75)

plt.xlabel('\nEstimated Shortest Distance', size = 15)
plt.ylabel('Proportion\n', size = 15)

plt.xticks(ind + width / 2, ('One', 'Two', 'Three or more'), size = 15)
plt.yticks(size = 15)

plt.legend(loc = 'best', fontsize = 15)

plt.ylim([0, 80])

plt.text(x = 0 , y = 64.5 , s = '62%' , fontsize = 15, ha = 'center')
plt.text(x = 0.3 , y = 69.5 , s = '67%' , fontsize = 15, ha = 'center')
plt.text(x = 1 , y = 33.5 , s = '31%' , fontsize = 15, ha = 'center')
plt.text(x = 1.3 , y = 29.5 , s = '27%' , fontsize = 15, ha = 'center')
plt.text(x = 2 , y = 9.5 , s = '7%' , fontsize = 15, ha = 'center')
plt.text(x = 2.3 , y = 8.5 , s = '6%' , fontsize = 15, ha = 'center')

plt.tight_layout()
plt.savefig(path_output + 'fig6c.pdf')
